/*
* Copyright (C) 2014 Indeed Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.indeed.imhotep.web;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.primitives.Longs;
import com.indeed.imhotep.api.ImhotepOutOfMemoryException;
import com.indeed.imhotep.client.ImhotepClient;
import com.indeed.imhotep.client.ShardIdWithVersion;
import com.indeed.imhotep.ez.EZImhotepSession;
import com.indeed.imhotep.iql.GroupStats;
import com.indeed.imhotep.iql.IQLQuery;
import com.indeed.imhotep.iql.cache.QueryCache;
import com.indeed.imhotep.metadata.DatasetMetadata;
import com.indeed.imhotep.metadata.FieldMetadata;
import com.indeed.imhotep.metadata.FieldType;
import com.indeed.imhotep.sql.IQLTranslator;
import com.indeed.imhotep.sql.ast2.DescribeStatement;
import com.indeed.imhotep.sql.ast2.FromClause;
import com.indeed.imhotep.sql.ast2.GroupByClause;
import com.indeed.imhotep.sql.ast2.IQLStatement;
import com.indeed.imhotep.sql.ast2.SelectClause;
import com.indeed.imhotep.sql.ast2.SelectStatement;
import com.indeed.imhotep.sql.ast2.ShowStatement;
import com.indeed.imhotep.sql.parser.StatementParser;
import com.indeed.util.core.io.Closeables2;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.SerializationConfig;
import org.codehaus.jackson.node.ArrayNode;
import org.codehaus.jackson.node.ObjectNode;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Duration;
import org.joda.time.Interval;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.servlet.ServletException;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TimeZone;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
/**
* @author dwahler
*/
@Controller
public class QueryServlet {
static {
DateTimeZone.setDefault(DateTimeZone.forOffsetHours(-6));
TimeZone.setDefault(TimeZone.getTimeZone("GMT-6"));
GlobalUncaughtExceptionHandler.register();
}
private static final Logger log = Logger.getLogger(QueryServlet.class);
private static final Logger dataLog = Logger.getLogger("indeed.logentry");
private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
private static final String METADATA_FILE_SUFFIX = ".meta";
// this can be incremented to invalidate the old cache
private static final byte VERSION_FOR_HASHING = 1;
private static final Set<String> USED_PARAMS = Sets.newHashSet("view", "sync", "csv", "json", "interactive", "nocache");
private final ImhotepClient imhotepClient;
private final ImhotepClient imhotepInteractiveClient;
private final ImhotepMetadataCache metadata;
private final TopTermsCache topTermsCache;
private final QueryCache queryCache;
private final ExecutionManager executionManager;
private final ExecutorService executorService;
private final long imhotepLocalTempFileSizeLimit;
private final long imhotepDaemonTempFileSizeLimit;
@Autowired
public QueryServlet(ImhotepClient imhotepClient,
ImhotepClient imhotepInteractiveClient,
ImhotepMetadataCache metadata,
TopTermsCache topTermsCache,
QueryCache queryCache,
ExecutionManager executionManager,
ExecutorService executorService,
Integer rowLimit,
Long imhotepLocalTempFileSizeLimit,
Long imhotepDaemonTempFileSizeLimit) {
this.imhotepClient = imhotepClient;
this.imhotepInteractiveClient = imhotepInteractiveClient;
this.metadata = metadata;
this.topTermsCache = topTermsCache;
this.queryCache = queryCache;
this.executionManager = executionManager;
this.executorService = executorService;
this.imhotepLocalTempFileSizeLimit = imhotepLocalTempFileSizeLimit;
this.imhotepDaemonTempFileSizeLimit = imhotepDaemonTempFileSizeLimit;
EZImhotepSession.GROUP_LIMIT = rowLimit;
}
@RequestMapping("/query")
protected void doGet(final HttpServletRequest req, final HttpServletResponse resp,
@Nonnull @RequestParam("q") String query) throws ServletException, IOException {
final String httpUserName = getUserNameFromRequest(req);
final String userName = Strings.nullToEmpty(Strings.isNullOrEmpty(httpUserName) ? req.getParameter("username") : httpUserName);
long queryStartTimestamp = System.currentTimeMillis();
final boolean json = req.getParameter("json") != null;
IQLStatement parsedQuery = null;
SelectExecutionStats selectExecutionStats = null;
Throwable errorOccurred = null;
try {
if(Strings.isNullOrEmpty(req.getParameter("client")) && Strings.isNullOrEmpty(userName)) {
throw new IdentificationRequiredException("IQL query requests have to include parameters 'client' and 'username' for identification");
}
parsedQuery = StatementParser.parse(query, metadata);
if(parsedQuery instanceof SelectStatement) {
logQueryToLog4J(query, (Strings.isNullOrEmpty(userName) ? req.getRemoteAddr() : userName), -1);
final ExecutionManager.QueryTracker queryTracker = executionManager.queryStarted(query, userName);
try {
queryTracker.acquireLocks(); // blocks and waits if necessary
queryStartTimestamp = System.currentTimeMillis(); // ignore time spent waiting
// actually process
final SelectRequestArgs selectRequestArgs = new SelectRequestArgs(req, userName);
selectExecutionStats = handleSelectStatement(selectRequestArgs, resp, (SelectStatement) parsedQuery, queryTracker);
} finally {
// this must be closed. but we may have to defer it to the async thread finishing query processing
if(!queryTracker.isAsynchronousRelease()) {
Closeables2.closeQuietly(queryTracker, log);
}
}
} else if(parsedQuery instanceof DescribeStatement) {
handleDescribeStatement(req, resp, (DescribeStatement)parsedQuery);
} else if(parsedQuery instanceof ShowStatement) {
handleShowStatement(req, resp);
} else {
throw new RuntimeException("Query parsing failed: unknown statement type");
}
} catch (Throwable e) {
final boolean progress = req.getParameter("progress") != null;
handleError(resp, json, e, true, progress);
errorOccurred = e;
} finally {
try {
String remoteAddr = getForwardedForIPAddress(req);
if(remoteAddr == null) {
remoteAddr = req.getRemoteAddr();
}
logQuery(req, query, userName, queryStartTimestamp, parsedQuery, selectExecutionStats, errorOccurred, remoteAddr);
} catch (Throwable ignored) { }
}
}
/**
* Gets the value associated with the last X-Forwarded-For header in the request. WARNING: the contract of HttpServletRequest does not assert anything about
* the order in which the header values will be returned. I have examined the Tomcat source to establish that it does return the values in order, but this
* behavior should not be assumed from other servlet containers.
*
* @param req request
* @return the X-Forwarded-For IP address or null if none
*/
private static String getForwardedForIPAddress(final HttpServletRequest req) {
return getForwardedForIPAddress(req, "X-Forwarded-For");
}
private static String getForwardedForIPAddress(final HttpServletRequest req, final String forwardForHeaderName) {
final Enumeration headers = req.getHeaders(forwardForHeaderName);
String value = null;
while (headers.hasMoreElements()) {
value = (String) headers.nextElement();
}
return value;
}
/**
* Gets the user name from the HTTP request if it was provided through Basic authentication.
*
* @param request Http request
* @return User name if Basic auth is used or null otherwise
*/
private static String getUserNameFromRequest(final HttpServletRequest request) {
final String authHeader = request.getHeader("Authorization");
if (authHeader == null) {
// try simple
final String rawUser = request.getRemoteUser();
if (rawUser == null) {
return null;
} else {
return rawUser;
}
} else {
final String credStr;
if (authHeader.startsWith("user ")) {
credStr = authHeader.substring(5);
} else {
// try basic auth
if (!authHeader.toUpperCase().startsWith("BASIC ")) {
// Not basic
return null;
}
// remove basic
final String credEncoded = authHeader.substring(6); //length of 'BASIC '
final byte[] credRaw = Base64.decodeBase64(credEncoded.getBytes());
if (credRaw == null) {
// invalid decoding
return null;
}
credStr = new String(credRaw);
}
// get username part from username:password
final String[] x = credStr.split(":");
if (x.length < 1) {
// bad split
return null;
}
return x[0];
}
}
private SelectExecutionStats handleSelectStatement(final SelectRequestArgs args, final HttpServletResponse resp, SelectStatement parsedQuery, final ExecutionManager.QueryTracker queryTracker) throws IOException {
// hashing is done before calling translate so only original JParsec parsing is considered
final String queryForHashing = parsedQuery.toHashKeyString();
final IQLQuery iqlQuery = IQLTranslator.translate(parsedQuery, args.interactive ? imhotepInteractiveClient : imhotepClient,
args.imhotepUserName, metadata, imhotepLocalTempFileSizeLimit, imhotepDaemonTempFileSizeLimit);
// TODO: handle requested format mismatch: e.g. cached CSV but asked for TSV shouldn't have to rerun the query
final String queryHash = getQueryHash(queryForHashing, iqlQuery.getShardVersionList(), args.csv);
final String cacheFileName = queryHash + (args.csv ? ".csv" : ".tsv");
final boolean isCached = queryCache.isFileCached(cacheFileName);
final QueryMetadata queryMetadata = new QueryMetadata();
queryMetadata.addItem("IQL-Cached", isCached, true);
final DateTime newestShard = getLatestShardVersion(iqlQuery.getShardVersionList());
queryMetadata.addItem("IQL-Newest-Shard", newestShard, args.returnNewestShardVersion);
final String shardList = shardListToString(iqlQuery.getShardVersionList());
queryMetadata.addItem("IQL-Shard-List", shardList, args.returnShardlist);
final List<Interval> timeIntervalsMissingShards= iqlQuery.getTimeIntervalsMissingShards();
if(timeIntervalsMissingShards.size() > 0) {
final String missingIntervals = intervalListToString(timeIntervalsMissingShards);
queryMetadata.addItem("IQL-Missing-Shards", missingIntervals);
}
queryMetadata.setPendingHeaders(resp);
if (args.headOnly) {
return new SelectExecutionStats(true);
}
final ServletOutputStream outputStream = resp.getOutputStream();
if (args.progress) {
outputStream.print(": This is the start of the IQL Query Stream\n\n");
}
if (!args.asynchronous) {
ResultServlet.setContentType(resp, args.avoidFileSave, args.csv, args.progress);
if (!args.cacheReadDisabled && isCached) {
log.trace("Returning cached data in " + cacheFileName);
// read metadata from cache
try {
final InputStream metadataCacheStream = queryCache.getInputStream(cacheFileName + METADATA_FILE_SUFFIX);
final QueryMetadata cachedMetadata = QueryMetadata.fromStream(metadataCacheStream);
queryMetadata.mergeIn(cachedMetadata);
queryMetadata.setPendingHeaders(resp);
resp.setHeader("Access-Control-Expose-Headers", StringUtils.join(resp.getHeaderNames(), ", "));
if(args.progress) {
outputStream.println("event: header");
outputStream.print("data: ");
outputStream.print(queryMetadata.toJSON() + "\n\n");
}
} catch (Exception e) {
log.info("Failed to load metadata cache from " + cacheFileName + METADATA_FILE_SUFFIX, e);
}
final InputStream cacheInputStream = queryCache.getInputStream(cacheFileName);
final int rowsWritten = IQLQuery.copyStream(cacheInputStream, outputStream, iqlQuery.getRowLimit(), args.progress);
outputStream.close();
return new SelectExecutionStats(isCached, rowsWritten, false, queryHash, 0);
}
final IQLQuery.WriteResults writeResults;
final IQLQuery.ExecutionResult executionResult;
try {
// TODO: should we always get totals? opt out http param?
executionResult = iqlQuery.execute(args.progress, outputStream, true);
queryMetadata.addItem("IQL-Timings", executionResult.getTimings().replace('\n', '\t'), args.progress);
queryMetadata.addItem("IQL-Imhotep-Temp-Bytes-Written", executionResult.getImhotepTempFilesBytesWritten(), args.progress);
queryMetadata.addItem("IQL-Totals", Arrays.toString(executionResult.getTotals()), args.getTotals);
queryMetadata.setPendingHeaders(resp);
resp.setHeader("Access-Control-Expose-Headers", StringUtils.join(resp.getHeaderNames(), ", "));
if(args.progress) {
outputStream.println("event: header");
outputStream.print("data: ");
outputStream.print(queryMetadata.toJSON() + "\n\n");
}
final Iterator<GroupStats> groupStats = executionResult.getRows();
final int groupingColumns = Math.max(1, (parsedQuery.groupBy == null || parsedQuery.groupBy.groupings == null) ? 1 : parsedQuery.groupBy.groupings.size());
final int selectColumns = Math.max(1, (parsedQuery.select == null || parsedQuery.select.getProjections() == null) ? 1 : parsedQuery.select.getProjections().size());
if(!args.asynchronous) {
writeResults = iqlQuery.outputResults(groupStats, outputStream, args.csv, args.progress, iqlQuery.getRowLimit(), groupingColumns, selectColumns, args.cacheWriteDisabled);
} else {
writeResults = new IQLQuery.WriteResults(0, null, groupStats, 0);
}
if (!args.cacheWriteDisabled && !isCached) {
executorService.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
try {
try {
final OutputStream metadataCacheStream = queryCache.getOutputStream(cacheFileName + METADATA_FILE_SUFFIX);
queryMetadata.toStream(metadataCacheStream);
metadataCacheStream.close();
} catch (Exception e) {
log.warn("Failed to upload metadata cache: " + cacheFileName, e);
}
try {
uploadResultsToCache(writeResults, cacheFileName, args.csv);
} catch (Exception e) {
log.warn("Failed to upload cache: " + cacheFileName, e);
}
} finally {
Closeables2.closeQuietly(queryTracker, log);
}
return null;
}
});
queryTracker.markAsynchronousRelease(); // going to be closed asynchronously after cache is uploaded
}
} catch (ImhotepOutOfMemoryException e) {
throw Throwables.propagate(e);
} finally {
Closeables2.closeQuietly(iqlQuery, log);
}
outputStream.close();
return new SelectExecutionStats(isCached, writeResults, queryHash, executionResult.getImhotepTempFilesBytesWritten());
} else {
// TODO: rework the async case to use the same code path as the sync case above except running under an executor
if (!isCached && args.cacheWriteDisabled) {
throw new IllegalStateException("Query cache is disabled so only synchronous calls can be served");
}
resp.setContentType("application/json");
if (!isCached) {
executorService.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
try {
// TODO: get totals working with the cache
final IQLQuery.ExecutionResult executionResult = iqlQuery.execute(false, null, false);
final Iterator<GroupStats> groupStats = executionResult.getRows();
final OutputStream cacheStream = queryCache.getOutputStream(cacheFileName);
IQLQuery.writeRowsToStream(groupStats, cacheStream, args.csv, Integer.MAX_VALUE, false);
cacheStream.close(); // has to be closed
return null;
} finally {
Closeables2.closeQuietly(iqlQuery, log);
Closeables2.closeQuietly(queryTracker, log);
}
}
});
queryTracker.markAsynchronousRelease(); // going to be closed asynchronously after cache is uploaded
}
final URL baseURL = new URL(args.requestURL);
final URL resultsURL = new URL(baseURL, "results/" + cacheFileName);
final ObjectMapper mapper = new ObjectMapper();
final ObjectNode ret = mapper.createObjectNode();
ret.put("filename", resultsURL.toString());
mapper.writeValue(outputStream, ret);
outputStream.close();
// we don't know number of rows as it's handled asynchronously
return new SelectExecutionStats(isCached, new IQLQuery.WriteResults(0, null, null, 0), queryHash, 0);
}
}
private static final DateTimeFormatter yyyymmddhhmmss = DateTimeFormat.forPattern("yyyyMMddHHmmss").withZone(DateTimeZone.forOffsetHours(-6));
@Nullable
private static DateTime getLatestShardVersion(List<ShardIdWithVersion> shardVersionList) {
long maxVersion = 0;
if(shardVersionList == null || shardVersionList.size() == 0) {
return null;
}
for(ShardIdWithVersion shard : shardVersionList) {
if(shard.getVersion() > maxVersion) {
maxVersion = shard.getVersion();
}
}
if(maxVersion == 0) {
return null;
}
return yyyymmddhhmmss.parseDateTime(String.valueOf(maxVersion));
}
private static String shardListToString(List<ShardIdWithVersion> shardVersionList) {
if(shardVersionList == null) {
return "";
}
final StringBuilder sb = new StringBuilder();
for(ShardIdWithVersion shard : shardVersionList) {
if(sb.length() != 0) {
sb.append(",");
}
sb.append(shard.getShardId()).append(".").append(shard.getVersion());
}
return sb.toString();
}
private static final DateTimeFormatter yyyymmddhh = DateTimeFormat.forPattern("yyyyMMdd.HH").withZone(DateTimeZone.forOffsetHours(-6));
private static String intervalListToString(List<Interval> intervals) {
if(intervals == null) {
return "";
}
final StringBuilder sb = new StringBuilder();
for(Interval interval: intervals) {
if(sb.length() != 0) {
sb.append(",");
}
sb.append(interval.getStart().toString(yyyymmddhh)).append("-").append(interval.getEnd().toString(yyyymmddhh));
}
return sb.toString();
}
private void uploadResultsToCache(IQLQuery.WriteResults writeResults, String cachedFileName, boolean csv) throws IOException {
if(writeResults.resultCacheIterator != null) {
// use the memory cached data
final OutputStream cacheStream = queryCache.getOutputStream(cachedFileName);
IQLQuery.writeRowsToStream(writeResults.resultCacheIterator, cacheStream, csv, Integer.MAX_VALUE, false);
cacheStream.close(); // has to be closed
} else if(writeResults.unsortedFile != null) {
// cache overflowed to disk so read from file
try {
queryCache.writeFromFile(cachedFileName, writeResults.unsortedFile);
} finally {
if(!writeResults.unsortedFile.delete()) {
log.info("Failed to delete: " + writeResults.unsortedFile.getPath());
}
}
} else { // this should never happen
log.warn("Results are not available to upload cache to HDFS: " + cachedFileName);
}
}
private static class SelectExecutionStats {
public final boolean cached;
public final int rowsWritten;
public final boolean overflowedToDisk;
public final String hashForCaching;
public final long imhotepTempFilesBytesWritten;
public final boolean headOnly;
private SelectExecutionStats(boolean headOnly) {
this.headOnly = headOnly;
hashForCaching = "";
overflowedToDisk = false;
rowsWritten = 0;
cached = false;
imhotepTempFilesBytesWritten = 0;
}
private SelectExecutionStats(boolean cached, int rowsWritten, boolean overflowedToDisk, String hashForCaching, long imhotepTempFilesBytesWritten) {
this.cached = cached;
this.rowsWritten = rowsWritten;
this.overflowedToDisk = overflowedToDisk;
this.hashForCaching = hashForCaching;
this.imhotepTempFilesBytesWritten = imhotepTempFilesBytesWritten;
this.headOnly = false;
}
private SelectExecutionStats(boolean cached, IQLQuery.WriteResults writeResults, String hashForCaching, long imhotepTempFilesBytesWritten) {
this.cached = cached;
this.hashForCaching = hashForCaching;
this.imhotepTempFilesBytesWritten = imhotepTempFilesBytesWritten;
this.rowsWritten = writeResults.rowsWritten;
this.overflowedToDisk = writeResults.didOverflowToDisk();
this.headOnly = false;
}
}
private void handleDescribeStatement(HttpServletRequest req, HttpServletResponse resp, DescribeStatement parsedQuery) throws IOException {
if(Strings.isNullOrEmpty(parsedQuery.field)) {
handleDescribeDataset(req, resp, parsedQuery);
} else {
handleDescribeField(req, resp, parsedQuery);
}
}
private void handleDescribeField(HttpServletRequest req, HttpServletResponse resp, DescribeStatement parsedQuery) throws IOException {
final ServletOutputStream outputStream = resp.getOutputStream();
final String dataset = parsedQuery.dataset;
final String fieldName = parsedQuery.field;
final List<String> topTerms = topTermsCache.getTopTerms(dataset, fieldName);
FieldMetadata field = metadata.getDataset(dataset).getField(fieldName);
if(field == null) {
field = new FieldMetadata("notfound", FieldType.String);
field.setDescription("Field not found");
}
final boolean json = req.getParameter("json") != null;
if(json) {
resp.setContentType(MediaType.APPLICATION_JSON_VALUE);
final ObjectMapper mapper = new ObjectMapper();
mapper.configure(SerializationConfig.Feature.INDENT_OUTPUT, true);
final ObjectNode jsonRoot = mapper.createObjectNode();
field.toJSON(jsonRoot);
final ArrayNode termsArray = mapper.createArrayNode();
jsonRoot.put("topTerms", termsArray);
for(String term : topTerms) {
termsArray.add(term);
}
mapper.writeValue(outputStream, jsonRoot);
} else {
for(String term : topTerms) {
outputStream.println(term);
}
}
outputStream.close();
}
private void handleDescribeDataset(HttpServletRequest req, HttpServletResponse resp, DescribeStatement parsedQuery) throws IOException {
final ServletOutputStream outputStream = resp.getOutputStream();
final String dataset = parsedQuery.dataset;
final DatasetMetadata datasetMetadata = metadata.getDataset(dataset);
final boolean json = req.getParameter("json") != null;
if(json) {
resp.setContentType(MediaType.APPLICATION_JSON_VALUE);
final ObjectMapper mapper = new ObjectMapper();
mapper.configure(SerializationConfig.Feature.INDENT_OUTPUT, true);
final ObjectNode jsonRoot = mapper.createObjectNode();
datasetMetadata.toJSON(jsonRoot, mapper, false);
mapper.writeValue(outputStream, jsonRoot);
} else {
for(FieldMetadata field : datasetMetadata.getFields().values()) {
final String description = Strings.nullToEmpty(field.getDescription());
outputStream.println(field.getName() + "\t" + description);
}
}
outputStream.close();
}
private void handleShowStatement(final HttpServletRequest req, final HttpServletResponse resp) throws IOException {
final ServletOutputStream outputStream = resp.getOutputStream();
final boolean json = req.getParameter("json") != null;
if(json) {
resp.setContentType(MediaType.APPLICATION_JSON_VALUE);
final ObjectMapper mapper = new ObjectMapper();
final ObjectNode jsonRoot = mapper.createObjectNode();
final ArrayNode array = mapper.createArrayNode();
jsonRoot.put("datasets", array);
for(DatasetMetadata dataset : metadata.getDatasets().values()) {
final ObjectNode datasetInfo = mapper.createObjectNode();
dataset.toJSON(datasetInfo, mapper, true);
array.add(datasetInfo);
}
mapper.writeValue(outputStream, jsonRoot);
} else {
for(DatasetMetadata dataset : metadata.getDatasets().values()) {
outputStream.println(dataset.getName());
}
}
outputStream.close();
}
/**
* Produces a Base64 encoded SHA-1 hash of the query and the list of shard names/versions which has to be sorted.
*/
private String getQueryHash(String query, Collection<ShardIdWithVersion> shards, boolean csv) {
final MessageDigest sha1;
try {
sha1 = MessageDigest.getInstance("SHA-1");
}
catch(NoSuchAlgorithmException e) {
log.error("Failed to init SHA1", e);
throw Throwables.propagate(e);
}
final String standardizedQuery = query.trim().replace('"', '\'').replaceAll("\\s+", " ");
sha1.update(standardizedQuery.getBytes(UTF8_CHARSET));
if(shards != null) {
for(ShardIdWithVersion shard : shards) {
sha1.update(shard.getShardId().getBytes(UTF8_CHARSET));
sha1.update(Longs.toByteArray(shard.getVersion()));
sha1.update(csv ? (byte)1 : 0);
}
}
sha1.update(VERSION_FOR_HASHING);
return Base64.encodeBase64URLSafeString(sha1.digest());
}
static void handleError(HttpServletResponse resp, boolean json, Throwable e, boolean status500, boolean isEventStream) throws IOException {
if(!(e instanceof Exception || e instanceof OutOfMemoryError)) {
throw Throwables.propagate(e);
}
// output parse/execute error
if(!json) {
final ServletOutputStream outputStream = resp.getOutputStream();
final PrintStream printStream = new PrintStream(outputStream);
if(isEventStream) {
resp.setContentType("text/event-stream");
final String[] stackTrace = Throwables.getStackTraceAsString(e).split("\\n");
printStream.println("event: servererror");
for (String s : stackTrace) {
printStream.println("data: " + s);
}
printStream.println();
} else {
resp.setStatus(500);
e.printStackTrace(printStream);
printStream.close();
}
} else {
if(status500) {
resp.setStatus(500);
}
// construct a parsed error object to be JSON serialized
String clause = "";
int offset = -1;
if(e instanceof IQLParseException) {
final IQLParseException IQLParseException = (IQLParseException) e;
clause = IQLParseException.getClause();
offset = IQLParseException.getOffsetInClause();
}
final String stackTrace = Throwables.getStackTraceAsString(Throwables.getRootCause(e));
final ErrorResult error = new ErrorResult(e.getClass().getSimpleName(), e.getMessage(), stackTrace, clause, offset);
resp.setContentType("application/json");
final ObjectMapper jsonMapper = new ObjectMapper();
final ServletOutputStream outputStream = resp.getOutputStream();
jsonMapper.defaultPrettyPrintingWriter().writeValue(outputStream, error);
outputStream.close();
}
}
// Logging code below
private static final int QUERY_LENGTH_LIMIT = 55000; // trying to not cause the logentry to overflow from being larger than 2^16
private void logQuery(HttpServletRequest req,
String query,
String userName,
long queryStartTimestamp,
IQLStatement parsedQuery,
SelectExecutionStats selectExecutionStats,
Throwable errorOccurred,
String remoteAddr) {
final long timeTaken = System.currentTimeMillis() - queryStartTimestamp;
if(timeTaken > 5000) { // we've already logged the query so only log again if it took a long time to run
logQueryToLog4J(query, (Strings.isNullOrEmpty(userName) ? remoteAddr : userName), timeTaken);
}
final String client = Strings.nullToEmpty(req.getParameter("client"));
final QueryLogEntry logEntry = new QueryLogEntry();
logEntry.setProperty("v", 0);
logEntry.setProperty("username", userName);
logEntry.setProperty("client", client);
logEntry.setProperty("raddr", Strings.nullToEmpty(remoteAddr));
logEntry.setProperty("starttime", Long.toString(queryStartTimestamp));
logEntry.setProperty("tottime", (int)timeTaken);
final List<String> params = Lists.newArrayList();
final Enumeration<String> paramsEnum = req.getParameterNames();
while(paramsEnum.hasMoreElements()) {
final String param = paramsEnum.nextElement();
if(USED_PARAMS.contains(param)) {
params.add(param);
}
}
logEntry.setProperty("params", Joiner.on(' ').join(params));
final String queryToLog = query.length() > QUERY_LENGTH_LIMIT ? query.substring(0, QUERY_LENGTH_LIMIT) : query;
logEntry.setProperty("q", queryToLog);
logEntry.setProperty("qlen", query.length());
logEntry.setProperty("error", errorOccurred != null ? "1" : "0");
if(errorOccurred != null) {
logEntry.setProperty("exceptiontype", errorOccurred.getClass().getSimpleName());
logEntry.setProperty("exceptionmsg", errorOccurred.getMessage());
}
final String queryType = logStatementData(parsedQuery, selectExecutionStats, logEntry);
logEntry.setProperty("statement", queryType);
dataLog.info(logEntry);
}
private String logStatementData(IQLStatement parsedQuery,
SelectExecutionStats selectExecutionStats,
QueryLogEntry logEntry) {
if(parsedQuery == null) {
return "invalid";
}
final String queryType;
if(parsedQuery instanceof SelectStatement) {
queryType = "select";
logSelectStatementData((SelectStatement) parsedQuery, selectExecutionStats, logEntry);
} else if(parsedQuery instanceof DescribeStatement) {
queryType = "describe";
final DescribeStatement describeStatement = (DescribeStatement) parsedQuery;
logEntry.setProperty("dataset", describeStatement.dataset);
if(describeStatement.field != null) {
logEntry.setProperty("field", describeStatement.field);
}
} else if(parsedQuery instanceof ShowStatement) {
queryType = "show";
} else {
queryType = "invalid";
}
return queryType;
}
private void logSelectStatementData(SelectStatement selectStatement,
SelectExecutionStats selectExecutionStats,
QueryLogEntry logEntry) {
final FromClause from = selectStatement.from;
final GroupByClause groupBy = selectStatement.groupBy;
final SelectClause select= selectStatement.select;
if(from != null) {
logEntry.setProperty("dataset", from.getDataset());
if(from.getStart() != null && from.getEnd() != null) {
logEntry.setProperty("days", new Duration(from.getStart(), from.getEnd()).getStandardDays());
}
}
final int selectCount;
if(select != null && select.getProjections() != null) {
selectCount = select.getProjections().size();
} else {
selectCount = 0;
}
logEntry.setProperty("selectcnt", selectCount);
final int groupByCount;
if(groupBy != null && groupBy.groupings != null) {
groupByCount = groupBy.groupings.size();
} else {
groupByCount = 0;
}
logEntry.setProperty("groupbycnt", groupByCount);
if(selectExecutionStats != null) {
logEntry.setProperty("cached", selectExecutionStats.cached ? "1" : "0");
logEntry.setProperty("rows", selectExecutionStats.rowsWritten);
logEntry.setProperty("disk", selectExecutionStats.overflowedToDisk ? "1" : "0");
logEntry.setProperty("hash", selectExecutionStats.hashForCaching);
logEntry.setProperty("head", selectExecutionStats.headOnly ? "1" : "0");
// convert bytes to megabytes
logEntry.setProperty("ftgsmb", selectExecutionStats.imhotepTempFilesBytesWritten / 1024 / 1024);
}
}
private void logQueryToLog4J(String query, String identification, long timeTaken) {
if(query.length() > 500) {
query = query.replaceAll("\\(([^\\)]{0,100}+)[^\\)]+\\)", "\\($1\\.\\.\\.\\)");
}
final String timeTakenStr = timeTaken >= 0 ? String.valueOf(timeTaken) : "";
log.info((timeTaken < 0 ? "+" : "-") + identification + "\t" + timeTakenStr + "\t" + query);
}
public static class IdentificationRequiredException extends RuntimeException {
public IdentificationRequiredException(String message) {
super(message);
}
}
private class SelectRequestArgs {
public final boolean avoidFileSave;
public final boolean asynchronous;
public final boolean csv;
public final boolean interactive;
public final boolean returnShardlist;
public final boolean returnNewestShardVersion;
public final boolean cacheReadDisabled;
public final boolean cacheWriteDisabled;
public final boolean headOnly;
public final boolean progress;
public final boolean getTotals;
public final String imhotepUserName;
public final String requestURL;
public SelectRequestArgs(HttpServletRequest req, String userName) {
asynchronous = req.getParameter("async") != null;
avoidFileSave = req.getParameter("view") != null && !this.asynchronous;
csv = req.getParameter("csv") != null;
interactive = req.getParameter("interactive") != null;
returnShardlist = req.getParameter("getshardlist") != null;
returnNewestShardVersion = req.getParameter("getversion") != null;
cacheReadDisabled = !queryCache.isEnabled() || req.getParameter("nocacheread") != null || req.getParameter("nocache") != null;
cacheWriteDisabled = !queryCache.isEnabled() || req.getParameter("nocachewrite") != null || req.getParameter("nocache") != null;
headOnly = "HEAD".equals(req.getMethod()) || req.getParameter("head") != null;
progress = req.getParameter("progress") != null;
getTotals = req.getParameter("totals") != null;
final String clientName = Strings.nullToEmpty(req.getParameter("client"));
imhotepUserName = "IQL:" + (!Strings.isNullOrEmpty(userName) ? userName : clientName);
requestURL = req.getRequestURL().toString();
}
}
}